1. Introduction

This document takes the QCd, normalized, scaled data and performs clustering.

#Install and load required R packages. Install the packages if you do not already have them installed
library(Seurat)
library(kableExtra)
library(ggplot2)
library(plotly)
library(tidyverse)
library(rmarkdown)

2. Load QC’d, normalized, scaled data

# Input path
datadirectory <- "/Users/nagarajanv/OneDrive - National Institutes of Health/Working/scRNAseqNilisha/"
# Set working directory
setwd(datadirectory)
# Load QCd data from current working directory
load('SeuratObjectAfterNormalization.RData')

3. PCA

##################### PCA
# Run PCA with 50 pcs
WT_TG_Filt_Scaled <- RunPCA(object = WT_TG_Filt_Scaled, npcs=50)
# Print genes in the top 5 pcs
print(WT_TG_Filt_Scaled[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  Cybb, Spi1, Csf1r, Clec4a3, Clec4a1 
## Negative:  AW112010, Trbc2, Ms4a4b, Nkg7, Il2rb 
## PC_ 2 
## Positive:  Il1rl1, Lmo4, Hes1, Lpcat2, Rnf128 
## Negative:  Cd79a, Ms4a1, Ebf1, Cd79b, Ly6d 
## PC_ 3 
## Positive:  Ms4a4b, Nkg7, Ccl5, Cd3g, Trac 
## Negative:  Cd79a, Ebf1, Ms4a1, Ly6d, Cd79b 
## PC_ 4 
## Positive:  Ccl5, Nkg7, Zeb2, Ms4a4b, Cx3cr1 
## Negative:  Wfdc21, Mmp9, Cd177, Itgb2l, Lcn2 
## PC_ 5 
## Positive:  Klrb1c, Ncr1, Fcer1g, Tyrobp, Styk1 
## Negative:  Cd3g, Ifi27l2a, Ly6a, Ifit3, Ifit1
# Identify number of PCs that explains majority of variations
ElbowPlot(WT_TG_Filt_Scaled, ndims=50, reduction = "pca")

# Visualize the genes in the first PC
VizDimLoadings(WT_TG_Filt_Scaled, dims = 1, ncol = 1) + theme_minimal(base_size = 8)

# Visualize the genes in the second PC
VizDimLoadings(WT_TG_Filt_Scaled, dims = 2, ncol = 1) + theme_minimal(base_size = 8)

# Set the identity column to WT vs TG
Idents(WT_TG_Filt_Scaled) <- "orig.ident"
# Visualize the cells after pca
DimPlot(object = WT_TG_Filt_Scaled, reduction = "pca")

# Plot heatmap with cells=500 plotting cells with extreme cells on both ends of spectrum
#DimHeatmap(object = WT_TG_Filt_Scaled, dims = 1:6, cells = 500, balanced = TRUE)
DimHeatmap(object = WT_TG_Filt_Scaled, dims = 2, cells = 500, balanced = TRUE)

3. Clustering

##################### Clustering
# use the first 20 pc's
use.pcs = 1:20
# FindNeighbors with 20 pcs
WT_TG_Filt_Scaled <- FindNeighbors(WT_TG_Filt_Scaled, reduction="pca", dims = use.pcs)
# FindClusters with resolution starting at 0.25 and ending at 4, with 0.5 increment
WT_TG_Filt_Scaled <- FindClusters(
  object = WT_TG_Filt_Scaled,
  resolution = seq(0.25,4,0.5),
  verbose = FALSE
)
head(WT_TG_Filt_Scaled)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCATCAACACCA-WT         WT       7477         2388     1.297312
## AAACGAAAGCTCTGTA-WT         WT       8471         2699     2.290166
## AAACGAACAGCTCTGG-WT         WT       9502         2658     2.694170
## AAAGGATCATGCCGAC-WT         WT      14984         2804     4.378003
## AAAGGGCAGGCCTAGA-WT         WT      10101         2653     2.910603
## AAAGTCCGTTCTTGCC-WT         WT       4780         1711     3.221757
## AAATGGAAGCTACTAC-WT         WT       6593         2292     1.956621
## AAATGGACAAGGTCAG-WT         WT       8548         2884     3.263921
## AACAAAGGTGTCACAT-WT         WT       7765         2287     2.974887
## AACAAGATCCTTCGAC-WT         WT       8894         2637     2.900832
##                     percent.ribo percent.hb RNA_snn_res.0.25 RNA_snn_res.0.75
## AAACCCATCAACACCA-WT     19.43293 0.00000000                7                8
## AAACGAAAGCTCTGTA-WT     17.91996 0.00000000                2                3
## AAACGAACAGCTCTGG-WT     33.86655 0.00000000                0                0
## AAAGGATCATGCCGAC-WT     38.70796 0.01334757                3                2
## AAAGGGCAGGCCTAGA-WT     32.60073 0.00000000                1                1
## AAAGTCCGTTCTTGCC-WT     30.69038 0.00000000                0                0
## AAATGGAAGCTACTAC-WT     17.54892 0.00000000                7                8
## AAATGGACAAGGTCAG-WT     15.65278 0.00000000                5                7
## AACAAAGGTGTCACAT-WT     31.59047 0.00000000                0                0
## AACAAGATCCTTCGAC-WT     26.14122 0.00000000                5                7
##                     RNA_snn_res.1.25 RNA_snn_res.1.75 RNA_snn_res.2.25
## AAACCCATCAACACCA-WT               10               15               17
## AAACGAAAGCTCTGTA-WT               12               14               16
## AAACGAACAGCTCTGG-WT                0                0                0
## AAAGGATCATGCCGAC-WT                3                7                7
## AAAGGGCAGGCCTAGA-WT                2                1                2
## AAAGTCCGTTCTTGCC-WT                5                2                5
## AAATGGAAGCTACTAC-WT               10               15               17
## AAATGGACAAGGTCAG-WT                9                8                8
## AACAAAGGTGTCACAT-WT                1                2                6
## AACAAGATCCTTCGAC-WT                9                8                8
##                     RNA_snn_res.2.75 RNA_snn_res.3.25 RNA_snn_res.3.75
## AAACCCATCAACACCA-WT               19               19               20
## AAACGAAAGCTCTGTA-WT               18               18               19
## AAACGAACAGCTCTGG-WT                7                9                7
## AAAGGATCATGCCGAC-WT                6                5                4
## AAAGGGCAGGCCTAGA-WT                1                0                0
## AAAGTCCGTTCTTGCC-WT                5                6               10
## AAATGGAAGCTACTAC-WT               19               19               20
## AAATGGACAAGGTCAG-WT                9                8                6
## AACAAAGGTGTCACAT-WT               13               14               11
## AACAAGATCCTTCGAC-WT                9                8                6
##                     seurat_clusters
## AAACCCATCAACACCA-WT              20
## AAACGAAAGCTCTGTA-WT              19
## AAACGAACAGCTCTGG-WT               7
## AAAGGATCATGCCGAC-WT               4
## AAAGGGCAGGCCTAGA-WT               0
## AAAGTCCGTTCTTGCC-WT              10
## AAATGGAAGCTACTAC-WT              20
## AAATGGACAAGGTCAG-WT               6
## AACAAAGGTGTCACAT-WT              11
## AACAAGATCCTTCGAC-WT               6
# Count number of clusters at each resolution
sapply(grep("res",colnames(WT_TG_Filt_Scaled@meta.data),value = TRUE),
       function(x) length(unique(WT_TG_Filt_Scaled@meta.data[,x])))
## RNA_snn_res.0.25 RNA_snn_res.0.75 RNA_snn_res.1.25 RNA_snn_res.1.75 
##                9               11               16               20 
## RNA_snn_res.2.25 RNA_snn_res.2.75 RNA_snn_res.3.25 RNA_snn_res.3.75 
##               22               24               25               26

4. tSNE

####################### TSNE
# Run tsne with 20 pcs
WT_TG_Filt_Scaled <- RunTSNE(
  object = WT_TG_Filt_Scaled,
  reduction.use = "pca",
  dims = use.pcs,
  do.fast = TRUE)

# Plot all clusters in all resolutions
DimPlot(object = WT_TG_Filt_Scaled, group.by=grep("res",colnames(WT_TG_Filt_Scaled@meta.data),value = TRUE)[1:4], ncol=2 , pt.size=3.0, reduction = "tsne", label = T)

# change default identity
Idents(WT_TG_Filt_Scaled) <- "RNA_snn_res.0.75"
# list cell number in each cluster for WT vs TG
table(Idents(WT_TG_Filt_Scaled),WT_TG_Filt_Scaled$orig.ident)
##     
##       TG  WT
##   0  380 143
##   1  163 229
##   2   89  68
##   3   90  64
##   4   71  64
##   5   87  30
##   6   64  52
##   7   50  44
##   8   23  39
##   9   27  29
##   10  16  19
# Visualize cluster at 0.75 resolution
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "tsne", label = T)

# Color by WT vs TG
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "tsne", group.by = "orig.ident" )

5. UMAP

########################## UMAP
# Run umap with 20 pcs
WT_TG_Filt_Scaled <- RunUMAP(
  object = WT_TG_Filt_Scaled,
  reduction.use = "pca",
  dims = use.pcs)
# Plot umap
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "umap", label = T)

# Plot umap with WT_TG group
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "umap", group.by = "orig.ident")

# Visualizations
# expression of variable genes across clusters
# Custom list of genes to visualize
#top10
mygenes = c("Lyz2","Cd74")
RidgePlot(WT_TG_Filt_Scaled, features = mygenes)

#VlnPlot(WT_TG_Filt_Scaled, features = top10)
VlnPlot(WT_TG_Filt_Scaled, features = mygenes)

#VlnPlot(WT_TG_Filt_Scaled, features = mygenes, split.by = "orig.ident")
FeaturePlot(WT_TG_Filt_Scaled, features = mygenes)

#FeaturePlot(WT_TG_Filt_Scaled, features = mygenes, split.by = "orig.ident")
DotPlot(WT_TG_Filt_Scaled, features = mygenes)

DoHeatmap(subset(WT_TG_Filt_Scaled), features = mygenes, size = 3)

VlnPlot(object = WT_TG_Filt_Scaled, features = "percent.mito", pt.size = 0.05)

save.image(file='SeuratObjectAfterClustering.RData')
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rmarkdown_2.9      forcats_0.5.1      stringr_1.4.0      dplyr_1.0.7       
##  [5] purrr_0.3.4        readr_1.4.0        tidyr_1.1.3        tibble_3.1.2      
##  [9] tidyverse_1.3.1    plotly_4.9.4.1     ggplot2_3.3.5      kableExtra_1.3.4  
## [13] SeuratObject_4.0.2 Seurat_4.0.3      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       systemfonts_1.0.2    
##   [4] plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2       
##   [7] splines_4.1.0         listenv_0.8.0         scattermore_0.7      
##  [10] digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0          
##  [13] magrittr_2.0.1        tensor_1.5            cluster_2.1.2        
##  [16] ROCR_1.0-11           globals_0.14.0        modelr_0.1.8         
##  [19] matrixStats_0.59.0    svglite_2.0.0         spatstat.sparse_2.0-0
##  [22] colorspace_2.0-2      rvest_1.0.0           ggrepel_0.9.1        
##  [25] haven_2.4.1           xfun_0.24             crayon_1.4.1         
##  [28] jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-11      
##  [31] zoo_1.8-9             glue_1.4.2            polyclip_1.10-0      
##  [34] gtable_0.3.0          webshot_0.5.2         leiden_0.3.8         
##  [37] future.apply_1.7.0    abind_1.4-5           scales_1.1.1         
##  [40] DBI_1.1.1             miniUI_0.1.1.1        Rcpp_1.0.6           
##  [43] viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20      
##  [46] spatstat.core_2.2-0   htmlwidgets_1.5.3     httr_1.4.2           
##  [49] RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2            
##  [52] pkgconfig_2.0.3       farver_2.1.0          sass_0.4.0           
##  [55] uwot_0.1.10           dbplyr_2.1.1          deldir_0.2-10        
##  [58] utf8_1.2.1            tidyselect_1.1.1      labeling_0.4.2       
##  [61] rlang_0.4.11          reshape2_1.4.4        later_1.2.0          
##  [64] munsell_0.5.0         cellranger_1.1.0      tools_4.1.0          
##  [67] cli_3.0.0             generics_0.1.0        broom_0.7.8          
##  [70] ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0        
##  [73] yaml_2.2.1            goftest_1.2-2         knitr_1.33           
##  [76] fs_1.5.0              fitdistrplus_1.1-5    RANN_2.6.1           
##  [79] pbapply_1.4-3         future_1.21.0         nlme_3.1-152         
##  [82] mime_0.11             xml2_1.3.2            compiler_4.1.0       
##  [85] rstudioapi_0.13       png_0.1-7             spatstat.utils_2.2-0 
##  [88] reprex_2.0.0          bslib_0.2.5.1         stringi_1.6.2        
##  [91] highr_0.9             RSpectra_0.16-0       lattice_0.20-44      
##  [94] Matrix_1.3-3          vctrs_0.3.8           pillar_1.6.1         
##  [97] lifecycle_1.0.0       spatstat.geom_2.2-0   lmtest_0.9-38        
## [100] jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0    
## [103] cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1         
## [106] patchwork_1.1.1       R6_2.5.0              promises_1.2.0.1     
## [109] KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.26.1    
## [112] codetools_0.2-18      MASS_7.3-54           assertthat_0.2.1     
## [115] withr_2.4.2           sctransform_0.3.2     mgcv_1.8-35          
## [118] parallel_4.1.0        hms_1.1.0             grid_4.1.0           
## [121] rpart_4.1-15          Rtsne_0.15            shiny_1.6.0          
## [124] lubridate_1.7.10
#https://ucdavis-bioinformatics-training.github.io/2021-March-Single-Cell-RNA-Seq-Analysis/data_analysis/scRNA_Workshop-PART4_fixed